In [1]:
import pandas as pd
In [3]:
data_assign = pd.read_csv("Car Price.csv")
In [5]:
data_assign
Out[5]:
| Brand | Model | Year | Selling_Price | KM_Driven | Fuel | Seller_Type | Transmission | Owner | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Maruti | Maruti 800 AC | 2007 | 60000 | 70000 | Petrol | Individual | Manual | First Owner |
| 1 | Maruti | Maruti Wagon R LXI Minor | 2007 | 135000 | 50000 | Petrol | Individual | Manual | First Owner |
| 2 | Hyundai | Hyundai Verna 1.6 SX | 2012 | 600000 | 100000 | Diesel | Individual | Manual | First Owner |
| 3 | Datsun | Datsun RediGO T Option | 2017 | 250000 | 46000 | Petrol | Individual | Manual | First Owner |
| 4 | Honda | Honda Amaze VX i-DTEC | 2014 | 450000 | 141000 | Diesel | Individual | Manual | Second Owner |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4335 | Hyundai | Hyundai i20 Magna 1.4 CRDi (Diesel) | 2014 | 409999 | 80000 | Diesel | Individual | Manual | Second Owner |
| 4336 | Hyundai | Hyundai i20 Magna 1.4 CRDi | 2014 | 409999 | 80000 | Diesel | Individual | Manual | Second Owner |
| 4337 | Maruti | Maruti 800 AC BSIII | 2009 | 110000 | 83000 | Petrol | Individual | Manual | Second Owner |
| 4338 | Hyundai | Hyundai Creta 1.6 CRDi SX Option | 2016 | 865000 | 90000 | Diesel | Individual | Manual | First Owner |
| 4339 | Renault | Renault KWID RXT | 2016 | 225000 | 40000 | Petrol | Individual | Manual | First Owner |
4340 rows × 9 columns
In [7]:
data_assign.head()
Out[7]:
| Brand | Model | Year | Selling_Price | KM_Driven | Fuel | Seller_Type | Transmission | Owner | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Maruti | Maruti 800 AC | 2007 | 60000 | 70000 | Petrol | Individual | Manual | First Owner |
| 1 | Maruti | Maruti Wagon R LXI Minor | 2007 | 135000 | 50000 | Petrol | Individual | Manual | First Owner |
| 2 | Hyundai | Hyundai Verna 1.6 SX | 2012 | 600000 | 100000 | Diesel | Individual | Manual | First Owner |
| 3 | Datsun | Datsun RediGO T Option | 2017 | 250000 | 46000 | Petrol | Individual | Manual | First Owner |
| 4 | Honda | Honda Amaze VX i-DTEC | 2014 | 450000 | 141000 | Diesel | Individual | Manual | Second Owner |
In [9]:
data_assign.describe()
Out[9]:
| Year | Selling_Price | KM_Driven | |
|---|---|---|---|
| count | 4340.000000 | 4.340000e+03 | 4340.000000 |
| mean | 2013.090783 | 5.041273e+05 | 66215.777419 |
| std | 4.215344 | 5.785487e+05 | 46644.102194 |
| min | 1992.000000 | 2.000000e+04 | 1.000000 |
| 25% | 2011.000000 | 2.087498e+05 | 35000.000000 |
| 50% | 2014.000000 | 3.500000e+05 | 60000.000000 |
| 75% | 2016.000000 | 6.000000e+05 | 90000.000000 |
| max | 2020.000000 | 8.900000e+06 | 806599.000000 |
In [11]:
data_assign.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4340 entries, 0 to 4339 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Brand 4340 non-null object 1 Model 4340 non-null object 2 Year 4340 non-null int64 3 Selling_Price 4340 non-null int64 4 KM_Driven 4340 non-null int64 5 Fuel 4340 non-null object 6 Seller_Type 4340 non-null object 7 Transmission 4340 non-null object 8 Owner 4340 non-null object dtypes: int64(3), object(6) memory usage: 305.3+ KB
In [13]:
data_assign.columns
Out[13]:
Index(['Brand', 'Model', 'Year', 'Selling_Price', 'KM_Driven', 'Fuel',
'Seller_Type', 'Transmission', 'Owner'],
dtype='object')
In [15]:
data_assign.columns.tolist()
Out[15]:
['Brand', 'Model', 'Year', 'Selling_Price', 'KM_Driven', 'Fuel', 'Seller_Type', 'Transmission', 'Owner']
In [17]:
Unique_data = {}
for column in data_assign.columns:
Unique_data[column] = data_assign[column].nunique()
In [19]:
Unique_data
Out[19]:
{'Brand': 29,
'Model': 1491,
'Year': 27,
'Selling_Price': 445,
'KM_Driven': 770,
'Fuel': 5,
'Seller_Type': 3,
'Transmission': 2,
'Owner': 5}
In [21]:
import matplotlib.pyplot as plt
In [23]:
fuel_counts = data_assign['Fuel'].value_counts()
plt.bar(fuel_counts.keys(), fuel_counts.values)
plt.title("Fuel Type Distribution")
plt.xlabel("Fuel Type")
plt.ylabel("Number of Cars")
plt.show()
In [25]:
seller_counts = data_assign['Seller_Type'].value_counts()
plt.pie(seller_counts.values,labels=seller_counts.index)
plt.title("Cars by Seller Type")
plt.figure()
plt.show()
<Figure size 640x480 with 0 Axes>
In [27]:
plt.figure(figsize=(12,6))
plt.hist(data_assign['KM_Driven'], color='green', edgecolor='black')
plt.title("Distribution of KM_Driven")
plt.xlabel("KM Driven")
plt.ylabel("Number of Cars")
plt.grid(axis='y')
plt.show()
In [29]:
owner_counts = data_assign['Owner'].value_counts()
plt.pie(owner_counts.values,labels=owner_counts.index)
plt.title("Distributed of Cars based on their Ownership Status")
plt.figure()
plt.show()
<Figure size 640x480 with 0 Axes>
Linear Regressions
In [31]:
X = data_assign[['KM_Driven']]
y = data_assign['Selling_Price']
In [33]:
from sklearn.model_selection import train_test_split
In [35]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)
In [37]:
from sklearn.linear_model import LinearRegression
In [39]:
model = LinearRegression()
model
Out[39]:
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [41]:
model.fit(X_train, y_train)
Out[41]:
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [43]:
lin_pred = model.predict(X_test)
In [45]:
lin_pred
Out[45]:
array([ 474691.65184692, 600033.79171264, 565214.69123636,
330429.94369959, 521990.57255097, 618771.25914954,
630134.82484869, 604763.68378304, 569289.49325501,
564559.60118461, 526720.46462137, 545640.03290299,
545640.03290299, 389553.59457965, 618700.31076849,
642222.06403461, 629167.56192029, 427392.73114288,
498341.11219894, 455772.08356531, 521990.57255097,
545640.03290299, 581114.22343102, 592938.95360703,
474691.65184692, 545640.03290299, 458137.02960051,
498341.11219894, 545640.03290299, 654427.55052229,
498341.11219894, 628413.14413506, 491246.27409334,
595303.89964223, 604763.68378304, 403743.27079086,
628413.14413506, 604763.68378304, 474691.65184692,
654427.55052229, 474691.65184692, 601216.26473024,
569289.49325501, 370634.02629803, 451042.1914949 ,
427392.73114288, 609493.57585345, 356444.35008682,
498341.11219894, 469961.75977652, 299685.64524196,
569289.49325501, 533011.22107501, 545640.03290299,
574019.38532541, 518242.13308517, 474691.65184692,
235832.10229151, 656224.90950904, 590384.81188901,
498341.11219894, 380093.81043884, 616588.41395905,
465231.86770611, 474691.65184692, 581114.22343102,
533011.22107501, 539642.52975772, 380093.81043884,
498341.11219894, 592938.95360703, 616588.41395905,
427392.73114288, 498341.11219894, 474691.65184692,
559829.7091142 , 119949.7465666 , 553900.78940395,
557464.763079 , 474712.93636124, 498341.11219894,
498341.11219894, 521990.57255097, 617534.39237313,
498341.11219894, 332794.88973479, 119949.7465666 ,
487675.20558018, 652062.60448708, 380093.81043884,
613509.25422122, 380093.81043884, 569289.49325501,
592938.95360703, 592938.95360703, 380093.81043884,
633145.4011515 , 474691.65184692, 462866.92167091,
454653.46409066, 626048.19809986, 539491.17321146,
-45596.47589755, 474691.65184692, 611177.41743051,
652062.60448708, 640237.87431107, 403743.27079086,
569289.49325501, 213078.95648683, 451042.1914949 ,
566924.54721981, 569289.49325501, 635701.90781556,
576384.33136062, 136504.36881302, 545640.03290299,
498341.11219894, 602398.73774784, 530170.92088673,
582533.19105214, 521498.66377564, 427392.73114288,
581114.22343102, 344619.6199108 , 427392.73114288,
484151.43598773, 498341.11219894, 618953.35999425,
521990.57255097, 555161.30564071, 592938.95360703,
484208.19469258, 545640.03290299, 616848.55802292,
548004.97893819, 498341.11219894, 545640.03290299,
652062.60448708, 640237.87431107, 635701.90781556,
427392.73114288, 380093.81043884, 609493.57585345,
518242.13308517, 394283.48665005, 531450.35669177,
309145.42938277, 500706.05823415, 380093.81043884,
560756.76796 , 474098.05039209, 628413.14413506,
604763.68378304, 380093.81043884, 592938.95360703,
533815.30272698, 451042.1914949 , 602398.73774784,
403743.27079086, 604763.68378304, 474691.65184692,
500706.05823415, 545640.03290299, 640237.87431107,
434487.56924849, 533224.06621818, 460461.77155311,
451042.1914949 , 451042.1914949 , 380093.81043884,
451042.1914949 , 451042.1914949 , 545640.03290299,
190898.12762266, 538793.51413108, 642602.82034628,
436852.51528369, 490418.54298102, 474691.65184692,
226372.3181507 , 384823.70250924, 601381.8109527 ,
628413.14413506, 604763.68378304, 383168.2402846 ,
498341.11219894, 573823.09480449, 427392.73114288,
525537.99160377, 451042.1914949 , 604763.68378304,
559829.7091142 , 604763.68378304, 533815.30272698,
285495.96903075, 380093.81043884, 546056.26340518,
564051.13778704, 652062.60448708, 609403.70790411,
545640.03290299, 320970.15955878, 521990.57255097,
569289.49325501, 451042.1914949 , 498341.11219894,
477056.59788213, 652062.60448708, 616588.41395905,
636217.46605123, 486516.38202293, 474691.65184692,
564559.60118461, 533815.30272698, 403743.27079086,
510165.84237496, 380093.81043884, 616848.55802292,
581114.22343102, 581114.22343102, 451042.1914949 ,
477056.59788213, 640237.87431107, 296138.22618916,
58461.14965134, 508983.36935735, 261846.50867873,
501309.11947312, 427392.73114288, 545640.03290299,
414276.74043165, 451042.1914949 , 649697.65845188,
557464.763079 , 661498.73916754, 451042.1914949 ,
569289.49325501, 451042.1914949 , 261846.50867873,
469961.75977652, 536719.45645821, 498341.11219894,
604763.68378304, 521990.57255097, 569289.49325501,
427392.73114288, 372800.31686627, 462866.92167091,
604763.68378304, 535352.51764986, 652062.60448708,
604763.68378304, 592938.95360703, 555161.30564071,
546056.26340518, 545640.03290299, 558789.13285871,
498341.11219894, 555099.8170438 , 598891.52277763,
529085.41065657, 616848.55802292, 471461.13556284,
285495.96903075, 451042.1914949 , 427392.73114288,
403743.27079086, 628413.14413506, 582296.69644862,
474691.65184692, 474691.65184692, 569289.49325501,
474691.65184692, 380093.81043884, 640237.87431107,
526720.46462137, 545640.03290299, 657620.22766981,
474691.65184692, 639031.75183312, 474691.65184692,
427392.73114288, 427392.73114288, 474691.65184692,
545640.03290299, 545640.03290299, 342254.6738756 ,
498341.11219894, 451042.1914949 , 474691.65184692,
647086.75802902, 474691.65184692, 569289.49325501,
361174.24215722, 592938.95360703, 498341.11219894,
581114.22343102, 616848.55802292, 498341.11219894,
268413.96381849, 451042.1914949 , 498341.11219894,
562194.6551494 , 451042.1914949 , 427392.73114288,
474691.65184692, 345934.52990638, 652062.60448708,
332794.88973479, 451042.1914949 , 576384.33136062,
545640.03290299, 474691.65184692, 370634.02629803,
498341.11219894, 451042.1914949 , 380093.81043884,
474691.65184692, 639031.75183312, 498341.11219894,
519429.33599484, 592938.95360703, 592938.95360703,
380093.81043884, 380093.81043884, 486516.38202293,
616588.41395905, 32446.74326412, 555480.57335546,
510941.5446745 , 309145.42938277, 328064.99766439,
531450.35669177, 507644.80990143, 380093.81043884,
478522.86442395, 261846.50867873, 661522.38862789,
451042.1914949 , 474691.65184692, 498341.11219894,
609493.57585345, 569289.49325501, 521990.57255097,
309145.42938277, 380093.81043884, 380093.81043884,
526720.46462137, 644967.76638148, 573156.18002256,
285495.96903075, 628413.14413506, 592938.95360703,
529002.63754534, 553900.78940395, 616588.41395905,
318605.21352358, 498341.11219894, 637872.92827587,
645776.57792552, 451042.1914949 , 545640.03290299,
498341.11219894, 520770.2603968 , 628413.14413506,
463399.03452883, 564587.98053703, 521990.57255097,
495876.83843026, 380093.81043884, 380093.81043884,
629595.61715266, 380093.81043884, 344619.6199108 ,
474691.65184692, 555099.8170438 , 462866.92167091,
521990.57255097, 604763.68378304, 661522.38862789,
451042.1914949 , 501843.59727708, 434487.56924849,
601216.26473024, 380093.81043884, 581114.22343102,
427392.73114288, 600021.96698246, 460501.97563571,
540910.14083258, 474691.65184692, 432122.62321328,
569289.49325501, 654427.55052229, 507800.89633975,
375922.04563274, 620135.83301186, 595918.78561139,
143599.20691862, 451042.1914949 , 639031.75183312,
510165.84237496, 521990.57255097, 498341.11219894,
592938.95360703, 403743.27079086, 474691.65184692,
540910.14083258, 514895.73444536, 395229.46506413,
451042.1914949 , 516787.69127352, 493611.22012854,
521990.57255097, 616588.41395905, 628413.14413506,
474691.65184692, 592938.95360703, 592938.95360703,
403743.27079086, 521990.57255097, 638270.23920979,
543275.08686779, 640237.87431107, 474691.65184692,
602081.83497912, 427392.73114288, 583479.16946622,
451042.1914949 , 451042.1914949 , 482422.660436 ,
498341.11219894, 625416.75750846, 552734.87100859,
607128.62981824, 380093.81043884, 415568.00096687,
467596.81374132, 510165.84237496, 380093.81043884,
474691.65184692, 380093.81043884, 498341.11219894,
536180.24876218, 614223.46792385, 498856.67043462,
413203.05493167, 427392.73114288, 380093.81043884,
545640.03290299, 474691.65184692, 605563.03554294,
-163843.77765766, 545640.03290299, 616588.41395905,
581114.22343102, 498341.11219894, 427392.73114288,
571890.93389373, 427392.73114288, 569289.49325501,
513240.27222072, 273671.23885474, 637399.93906883,
427392.73114288, 661285.89402437, 403743.27079086,
592938.95360703, 569289.49325501, 496094.4134655 ,
403743.27079086, 454353.11594418, 640237.87431107,
427392.73114288, 545640.03290299, 521990.57255097,
559713.82675848, 546056.26340518, 581114.22343102,
403743.27079086, 451042.1914949 , 545640.03290299,
569289.49325501, 498341.11219894, 451042.1914949 ,
436852.51528369, 543986.93562438, 545640.03290299,
451042.1914949 , 380093.81043884, 521990.57255097,
640237.87431107, 633852.52001603, 540910.14083258,
616588.41395905, 493611.22012854, 498341.11219894,
498341.11219894, 498341.11219894, -45596.47589755,
427392.73114288, 348015.68241736, 429480.97849196,
654427.55052229, 616588.41395905, 141234.26088342,
285495.96903075, 427392.73114288, 604763.68378304,
441582.40735409, 616588.41395905, 569289.49325501,
539491.17321146, 640237.87431107, 652062.60448708,
427392.73114288, 451042.1914949 , 604763.68378304,
592938.95360703, 309145.42938277, 592938.95360703,
545640.03290299, 596167.10494508, 462781.78361365,
498341.11219894, 521990.57255097, 592938.95360703,
640237.87431107, 555099.8170438 , 569289.49325501,
474691.65184692, 592052.09884383, 498341.11219894,
503071.00426935, 611255.46064967, 569289.49325501,
380093.81043884, 592938.95360703, 581114.22343102,
451042.1914949 , 628413.14413506, 451042.1914949 ,
380093.81043884, 442965.90078469, 427392.73114288,
616588.41395905, 143599.20691862, 498341.11219894,
491750.00759884, 498341.11219894, 451042.1914949 ,
581114.22343102, 553900.78940395, 545640.03290299,
608873.95999222, 451042.1914949 , 526947.49944075,
380093.81043884, 521990.57255097, 432122.62321328,
552836.56368811, 564559.60118461, 566865.42356893,
521990.57255097, 592938.95360703, 502342.60089051,
548004.97893819, 427392.73114288, 576384.33136062,
247893.32707104, 467596.81374132, 604763.68378304,
569289.49325501, 545640.03290299, 474691.65184692,
593175.44821055, 498341.11219894, 569289.49325501,
569289.49325501, 569289.49325501, 474691.65184692,
523445.01436262, 521990.57255097, 380093.81043884,
474691.65184692, 380093.81043884, 576384.33136062,
521990.57255097, 451042.1914949 , 567184.69128368,
474691.65184692, 427392.73114288, 569289.49325501,
498341.11219894, 581114.22343102, 498341.11219894,
651589.61528004, 640237.87431107, 484624.42519477,
567466.11986187, 592938.95360703, 618953.35999425,
427392.73114288, 380093.81043884, 607128.62981824,
403743.27079086, 604763.68378304, 540910.14083258,
596531.3066345 , 533815.30272698, 479421.54391733,
547501.24543269, -45596.47589755, 515680.89652905,
498341.11219894, 592938.95360703, 134943.50442978,
604763.68378304, 614223.46792385, 427392.73114288,
628413.14413506, 616588.41395905, 552351.74975089,
545640.03290299, 498341.11219894, 403743.27079086,
498341.11219894, 529085.41065657, 451042.1914949 ,
538545.19479738, 380093.81043884, 571131.78621643,
637872.92827587, 521990.57255097, 498341.11219894,
521990.57255097, 427392.73114288, 474691.65184692,
498341.11219894, 545640.03290299, 628413.14413506,
427392.73114288, 443278.07366133, 503071.00426935,
498341.11219894, 427392.73114288, 530078.68799136,
623683.25206466, 604763.68378304, 569289.49325501,
498341.11219894, 423019.94592379, 647332.71241668,
451042.1914949 , 566628.92896541, 628413.14413506,
451042.1914949 , 526720.46462137, 403743.27079086,
406744.38730953, 543275.08686779, 530078.68799136,
602398.73774784, 635507.98224067, 574019.38532541,
530078.68799136, 578749.27739582, 451042.1914949 ,
521990.57255097, 569289.49325501, 482732.46836661,
624667.0696153 , 639031.75183312, 403743.27079086,
403743.27079086, 645204.260985 , 451042.1914949 ,
604763.68378304, 633143.03620547, 434518.31354694,
604763.68378304, 524710.26049145, 533815.30272698,
592938.95360703, 538545.19479738, 498341.11219894,
332794.88973479, 545640.03290299, 581114.22343102,
616588.41395905, 582533.19105214, 380093.81043884,
640237.87431107, 628413.14413506, 592938.95360703,
474691.65184692, 510165.84237496, 380093.81043884,
520098.6157228 , 495976.16616374, 403743.27079086,
309145.42938277, 597668.84567744, 403743.27079086,
477598.17052419, 616588.41395905, 536180.24876218,
521990.57255097, 616588.41395905, 661522.38862789,
474691.65184692, 548004.97893819, 332794.88973479,
623683.25206466, 623683.25206466, 474691.65184692,
571654.43929021, 276036.18488994, 451042.1914949 ,
214547.58797469, 521990.57255097, 581114.22343102,
474691.65184692, 510165.84237496, 498341.11219894,
628413.14413506, 521990.57255097, 514895.73444536,
380093.81043884, 380093.81043884, 469961.75977652,
427392.73114288, 555161.30564071, 645776.57792552,
628413.14413506, 566827.58443236, 607128.62981824,
311510.37541797, 501888.53125175, 389643.46252898,
545640.03290299, 628413.14413506, 589706.07237691,
498341.11219894, 639833.46853905, 521990.57255097,
406744.38730953, 545640.03290299, 571654.43929021,
329389.3674441 , 403743.27079086, 380093.81043884,
604763.68378304, 415568.00096687, 569289.49325501,
514895.73444536, 451042.1914949 , 616588.41395905,
661498.73916754, 566924.54721981, 190898.12762266,
521990.57255097, 592938.95360703, 403743.27079086,
479421.54391733, 604763.68378304, 592938.95360703,
427392.73114288, 521990.57255097, 380093.81043884,
628413.14413506, 581114.22343102, 661522.38862789,
474691.65184692, 498341.11219894, 581114.22343102,
533815.30272698, 569289.49325501, 451042.1914949 ,
545640.03290299, 521990.57255097, 552734.87100859,
548004.97893819, 644967.76638148, 526720.46462137,
498341.11219894, 474691.65184692, 380093.81043884,
557464.763079 , 491246.27409334, 582533.19105214,
605563.03554294, 474691.65184692, 578749.27739582,
640237.87431107, 332794.88973479, 643785.29336388,
332794.88973479, 427392.73114288, 403743.27079086,
344619.6199108 , 545640.03290299, 611858.52188865,
521990.57255097, 585844.11550142, 501888.53125175,
529085.41065657, 380093.81043884, 616588.41395905,
595303.89964223, 295343.60432133, 537126.22717626,
525537.99160377, 503071.00426935, 498341.11219894,
613291.67918598, 581114.22343102, 414553.43911777,
607128.62981824, 604763.68378304, 537003.24998243,
380093.81043884, 597730.33427435, 451042.1914949 ,
380093.81043884, 408473.16286126, 525537.99160377,
521990.57255097, 261846.50867873, 590574.00757183,
481786.48995253, 578749.27739582, 604763.68378304,
479421.54391733, 380093.81043884, 550369.92497339,
380093.81043884, 521990.57255097, 521990.57255097,
451042.1914949 , 652062.60448708, 451042.1914949 ,
474691.65184692, 486516.38202293, 661049.39942085,
640237.87431107, 597668.84567744, 498341.11219894,
581114.22343102, 569289.49325501, 403743.27079086,
628413.14413506, 519429.33599484, 403743.27079086,
621318.30602946, 427392.73114288, 521990.57255097,
475540.66747356, 474691.65184692, 552351.74975089,
190898.12762266, 427392.73114288, 521990.57255097,
380093.81043884, 569289.49325501, 648562.48435499,
592938.95360703])
In [47]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
In [49]:
mean_absolute_error(y_test, lin_pred)
Out[49]:
300629.95639712235
In [51]:
mean_squared_error(y_test, lin_pred)
Out[51]:
292829552069.2159
In [53]:
r2_score(y_test, lin_pred)
Out[53]:
0.04043884538588105
In [55]:
import matplotlib.pyplot as plt
In [57]:
plt.figure()
plt.hist(data_assign['KM_Driven'])
plt.xlabel("KM Driven")
plt.ylabel("Number of Cars")
plt.title("Distribution of KM Driven Values")
plt.show()
In [59]:
plt.pie([y_test.sum(), lin_pred.sum()],labels=["Actual Selling Price", "Predicted Selling Price"])
plt.title("Actual vs Predicted Selling Price")
plt.figure()
plt.show()
<Figure size 640x480 with 0 Axes>
In [61]:
actual_avg = y_test.mean()
predicted_avg = lin_pred.mean()
plt.figure()
plt.bar(["Actual Price", "Predicted Price"], [actual_avg, predicted_avg])
plt.xlabel("Price Type")
plt.ylabel("Average Selling Price")
plt.title("Comparison of Average Actual and Predicted Selling Prices")
plt.show()
Decision Tree Regressions
In [63]:
from sklearn.preprocessing import LabelEncoder
In [65]:
fuel_encoder = LabelEncoder()
data_assign["Fuel_Encoded"] = fuel_encoder.fit_transform(data_assign["Fuel"])
In [67]:
X = data_assign[['Fuel_Encoded']]
y = data_assign['Selling_Price']
In [69]:
from sklearn.model_selection import train_test_split
In [71]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.2, random_state=42)
In [73]:
from sklearn.tree import DecisionTreeClassifier
In [75]:
model_tree = DecisionTreeClassifier()
In [77]:
model_tree
Out[77]:
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [79]:
model_tree.fit(X_train, y_train)
Out[79]:
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [81]:
y_pred = model_tree.predict(X_test)
In [83]:
y_pred
Out[83]:
array([450000, 300000, 300000, 450000, 450000, 300000, 300000, 300000,
450000, 300000, 450000, 450000, 450000, 450000, 300000, 300000,
450000, 450000, 300000, 450000, 450000, 450000, 300000, 450000,
450000, 450000, 450000, 300000, 300000, 300000, 300000, 300000,
450000, 450000, 300000, 450000, 450000, 300000, 300000, 300000,
450000, 300000, 450000, 450000, 450000, 450000, 300000, 450000,
450000, 450000, 450000, 300000, 300000, 300000, 300000, 450000,
450000, 450000, 300000, 300000, 300000, 450000, 300000, 450000,
300000, 300000, 300000, 300000, 450000, 450000, 450000, 300000,
300000, 450000, 450000, 450000, 450000, 450000, 300000, 450000,
450000, 300000, 300000, 300000, 300000, 450000, 450000, 300000,
300000, 300000, 300000, 300000, 300000, 300000, 300000, 450000,
450000, 450000, 300000, 450000, 300000, 450000, 450000, 300000,
450000, 450000, 300000, 450000, 300000, 450000, 80000, 300000,
450000, 300000, 300000, 450000, 80000, 300000, 450000, 450000,
450000, 450000, 450000, 450000, 450000, 300000, 300000, 450000,
300000, 300000, 450000, 450000, 450000, 300000, 300000, 450000,
450000, 300000, 300000, 300000, 300000, 300000, 450000, 300000,
450000, 300000, 450000, 450000, 450000, 450000, 300000, 365000,
450000, 300000, 450000, 300000, 450000, 300000, 300000, 450000,
450000, 450000, 450000, 300000, 300000, 450000, 450000, 450000,
300000, 300000, 450000, 300000, 300000, 300000, 450000, 450000,
300000, 450000, 300000, 450000, 450000, 450000, 450000, 300000,
300000, 450000, 300000, 450000, 300000, 450000, 450000, 300000,
450000, 300000, 300000, 300000, 450000, 300000, 450000, 300000,
300000, 450000, 450000, 450000, 300000, 450000, 300000, 300000,
300000, 450000, 300000, 450000, 450000, 300000, 300000, 300000,
300000, 450000, 300000, 300000, 450000, 300000, 300000, 300000,
450000, 450000, 450000, 450000, 450000, 450000, 300000, 300000,
450000, 300000, 300000, 450000, 300000, 300000, 450000, 450000,
450000, 300000, 450000, 300000, 300000, 450000, 450000, 300000,
450000, 300000, 300000, 450000, 300000, 300000, 450000, 300000,
450000, 300000, 300000, 300000, 450000, 300000, 300000, 300000,
450000, 450000, 450000, 300000, 300000, 300000, 450000, 450000,
300000, 300000, 450000, 450000, 450000, 450000, 450000, 450000,
300000, 450000, 450000, 450000, 300000, 300000, 450000, 450000,
300000, 300000, 300000, 300000, 300000, 300000, 300000, 450000,
300000, 300000, 300000, 450000, 450000, 300000, 300000, 450000,
300000, 450000, 300000, 300000, 450000, 450000, 450000, 450000,
300000, 450000, 450000, 300000, 450000, 450000, 450000, 300000,
450000, 450000, 450000, 300000, 300000, 300000, 450000, 450000,
450000, 300000, 450000, 450000, 450000, 450000, 300000, 300000,
300000, 450000, 300000, 300000, 450000, 300000, 300000, 450000,
300000, 450000, 450000, 450000, 300000, 300000, 300000, 450000,
450000, 300000, 300000, 450000, 450000, 450000, 450000, 300000,
300000, 450000, 450000, 300000, 300000, 300000, 300000, 450000,
450000, 450000, 450000, 450000, 300000, 450000, 450000, 300000,
450000, 450000, 450000, 300000, 450000, 450000, 300000, 450000,
300000, 450000, 450000, 450000, 450000, 450000, 450000, 300000,
450000, 450000, 300000, 450000, 450000, 300000, 300000, 450000,
450000, 300000, 450000, 450000, 450000, 300000, 300000, 450000,
450000, 300000, 450000, 450000, 450000, 365000, 450000, 300000,
300000, 300000, 450000, 450000, 450000, 300000, 300000, 300000,
300000, 450000, 300000, 300000, 450000, 450000, 450000, 450000,
300000, 300000, 450000, 300000, 450000, 450000, 450000, 450000,
300000, 450000, 300000, 450000, 300000, 300000, 300000, 450000,
450000, 450000, 450000, 300000, 300000, 450000, 300000, 300000,
300000, 450000, 450000, 300000, 450000, 300000, 450000, 300000,
300000, 450000, 300000, 450000, 450000, 300000, 450000, 450000,
80000, 450000, 300000, 450000, 300000, 450000, 300000, 450000,
80000, 450000, 300000, 450000, 300000, 300000, 300000, 450000,
300000, 300000, 450000, 300000, 300000, 300000, 450000, 450000,
300000, 450000, 450000, 450000, 300000, 300000, 450000, 450000,
365000, 450000, 450000, 450000, 450000, 300000, 450000, 300000,
300000, 450000, 300000, 300000, 450000, 300000, 450000, 450000,
450000, 300000, 450000, 300000, 450000, 450000, 450000, 300000,
365000, 300000, 300000, 450000, 300000, 450000, 450000, 300000,
450000, 300000, 450000, 300000, 450000, 450000, 450000, 450000,
450000, 450000, 300000, 450000, 300000, 450000, 300000, 300000,
300000, 450000, 450000, 300000, 300000, 450000, 450000, 300000,
450000, 450000, 300000, 300000, 450000, 300000, 450000, 300000,
300000, 300000, 450000, 300000, 450000, 300000, 450000, 450000,
300000, 450000, 300000, 300000, 300000, 450000, 450000, 450000,
450000, 300000, 300000, 450000, 450000, 450000, 450000, 450000,
300000, 300000, 300000, 300000, 300000, 300000, 300000, 450000,
300000, 300000, 300000, 450000, 450000, 300000, 450000, 300000,
300000, 300000, 300000, 300000, 450000, 300000, 450000, 450000,
450000, 450000, 300000, 300000, 450000, 450000, 450000, 450000,
300000, 450000, 300000, 300000, 300000, 450000, 450000, 450000,
450000, 300000, 450000, 365000, 450000, 80000, 450000, 300000,
450000, 300000, 300000, 450000, 300000, 450000, 300000, 300000,
450000, 300000, 300000, 450000, 450000, 300000, 300000, 300000,
300000, 450000, 450000, 450000, 450000, 300000, 300000, 450000,
300000, 300000, 300000, 450000, 300000, 300000, 300000, 300000,
300000, 300000, 300000, 300000, 300000, 450000, 300000, 300000,
300000, 450000, 300000, 300000, 300000, 450000, 300000, 450000,
300000, 300000, 450000, 450000, 450000, 450000, 300000, 450000,
450000, 450000, 300000, 300000, 450000, 450000, 450000, 450000,
300000, 450000, 300000, 450000, 300000, 450000, 450000, 450000,
300000, 450000, 300000, 300000, 450000, 450000, 450000, 450000,
450000, 300000, 450000, 450000, 450000, 300000, 300000, 300000,
300000, 450000, 300000, 450000, 450000, 450000, 300000, 450000,
300000, 300000, 450000, 300000, 450000, 300000, 300000, 450000,
300000, 300000, 300000, 450000, 300000, 300000, 450000, 450000,
300000, 300000, 450000, 300000, 450000, 300000, 300000, 450000,
450000, 450000, 450000, 450000, 300000, 450000, 450000, 300000,
300000, 300000, 300000, 300000, 450000, 450000, 300000, 300000,
300000, 450000, 450000, 300000, 450000, 300000, 300000, 300000,
300000, 300000, 300000, 300000, 450000, 300000, 300000, 450000,
300000, 450000, 300000, 450000, 300000, 300000, 300000, 300000,
450000, 450000, 300000, 300000, 450000, 300000, 300000, 450000,
300000, 300000, 450000, 450000, 450000, 450000, 300000, 300000,
450000, 300000, 300000, 450000, 450000, 450000, 300000, 300000,
450000, 450000, 450000, 450000, 300000, 450000, 450000, 450000,
300000, 300000, 450000, 450000, 300000, 450000, 450000, 300000,
300000, 300000, 300000, 450000, 450000, 300000, 450000, 300000,
450000, 450000, 300000, 300000, 450000, 450000, 300000, 300000,
450000, 300000, 450000, 300000, 450000, 450000, 300000, 300000,
450000, 450000, 300000, 300000], dtype=int64)
In [85]:
mean_absolute_error(y_test, y_pred)
Out[85]:
273683.28686635947
In [87]:
mean_squared_error(y_test, y_pred)
Out[87]:
302642379882.89056
In [89]:
r2_score(y_test, y_pred)
Out[89]:
0.008283592200596268
In [91]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
In [93]:
plot_tree(model_tree);
In [95]:
plt.figure(figsize=(40,20), dpi=800)
plot_tree(
model_tree,
feature_names=["Fuel_Encoded"],
filled=True,
rounded=True,
fontsize=9,
max_depth=2
)
plt.savefig('Course_work_image')
plt.show()